import math
import numpy as np
import sklearn
from sklearn.kernel_ridge import KernelRidge
from sklearn.svm import SVC
from sklearn.metrics import cohen_kappa_score
from sklearn.linear_model import OrthogonalMatchingPursuit
from sklearn.linear_model import OrthogonalMatchingPursuitCV
from sklearn.datasets import make_sparse_coded_signal

import cvxpy as cp
seed=42
np.random.seed(seed=seed)

def ridge_regression(K1, K2, y1, y2, alpha, c):
    n_val, n_train = K2.shape
    clf = KernelRidge(kernel = "precomputed", alpha = alpha)
    one_hot_label = np.eye(c)[y1] - 1.0 / c
    clf.fit(K1, one_hot_label)
    z = clf.predict(K2).argmax(axis = 1)
    return 1.0 * np.sum(z == y2) / n_val

def svm(K1, K2, y1, y2, C, c):
    n_val, n_train = K2.shape
    clf = SVC(kernel = "precomputed", C = C, cache_size = 100000)
    clf.fit(K1, y1)
    z = clf.predict(K2)
    return 1.0 * np.sum(z == y2) / n_val

def gen_bound(K, y):
    alpha = np.linalg.solve(K, y)
    C = alpha.T.dot(K).dot(alpha)
    return np.sum(np.sqrt(np.diag(C))) * np.sqrt(np.trace(K)) / K.shape[0]

def normalize(K):
    L = np.diag(K)
    return K / np.clip(np.sqrt(np.outer(L, L)), a_min = 1e-9, a_max = None)

def translate(K1, K2):
    n1, n2 = K2.shape
    m1 = np.mean(K1, axis = 0)
    m2 = np.mean(K2, axis = 0)
    o1 = np.ones(n1)
    o2 = np.ones(n2)
    return K1 - np.outer(m1, o1) - np.outer(o1, m1) + np.mean(K1), K2 - np.outer(m1, o2) - np.outer(o1, m2) + np.mean(K1)


def relu(x):
    return np.maximum(0,x)
def drelu(x):
    return x>=0
def our_kernel(X,y,Xtest,ytest,beta2):
    y[y>1]=1
    ytest[ytest>1]=1
    
    y=(2*y-1).reshape(-1,1)
    ytest=(2*ytest-1).reshape(-1,1)
    mean=np.mean(X,axis=0, keepdims=True)
    std=np.mean(X,axis=0, keepdims=True)
    std[std==0]=1
    Xtemp=(X-mean)/std
    Xtesttemp=(Xtest-mean)/std
    Xb=np.append(Xtemp,np.ones((X.shape[0],1)),axis=1)
    Xbtest=np.append(Xtesttemp,np.ones((Xtest.shape[0],1)),axis=1)
    n,d=Xb.shape
    n2,d=Xbtest.shape

    Pall=300
    W=np.random.randn(d,Pall)
    dmatall=drelu(Xb@W)
    dmat, ind=np.unique(dmatall,axis=1, return_index=True)
    W=W[:,ind]
    
    
    m1=dmat.shape[1]
    Uopt1=cp.Variable((d-1,m1))
    Uopt2=cp.Variable((d-1,m1))
    b1=cp.Variable((1,m1))
    fact=1
    y=y.reshape((n,))
    yopt1=cp.Parameter((n,1))
    yopt1=cp.sum(cp.multiply(dmat,(X@Uopt1+b1)),axis=1)
    cost=cp.sum_squares(y-yopt1)/n + \
        beta2*(cp.mixed_norm(cp.vstack([Uopt1,fact*b1]).T,2,1))
    constraints=[]
    prob=cp.Problem(cp.Minimize(cost),constraints)
    prob.solve(solver=cp.MOSEK,warm_start=True,verbose=False)
    cvx_opt=prob.value
    dmat_test=drelu(Xbtest@W)
    ytest_est=np.sign(np.sum(dmat_test*(Xtest@Uopt1.value+b1.value),axis=1))
    ytest=ytest.reshape((n2,))
    
    acc_test_cvx=np.sum(ytest_est==ytest)/n2
    return acc_test_cvx


    
